################################################################################
##
## Purpose: This script uses RSelenium to scrape information on interst groups from
##          https://justfacts.votesmart.org/interest-groups. It is included for 
##          reference, but is not  part of the replication materials due to the 
##          reliance on an API key.
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
##  - Inputs:
##  - Outputs:
##    - ./data/prepped/demographics/IG_ratings_new.csv
##
################################################################################


rm(list = ls())
gc()
require(rvest)
require(RSelenium)
require(tidyverse)
require(xml2)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()


url <- 'https://justfacts.votesmart.org/interest-groups/NA/68'

womens <- read_html(url)

tabs <- womens %>% html_elements('.sig-list-tbody') %>%
  html_table()

nodes <- html_nodes(womens,'.sig-list-tbody')
node_list <- xml_contents(nodes)

IGratings <- NULL
for(i in 1:length(node_list)) {
  nd <- as_list(node_list[[i]])
  
  ratingLink <- attr(nd$td$a,'href')
  yr <- nd$td$a[[1]]
  name <- nd[[5]]$a[[1]]
  
  
  if(is.null(ratingLink)) { next }
  # stop()
  
  ratings <- read_html(paste0('https://justfacts.votesmart.org',ratingLink))
  pages <- try(max(as.numeric(str_extract_all(unique(html_nodes(ratings,'h7') %>%
    html_text()),'\\d+')[[1]])))
  if(class(pages) == 'try-error') { 
    pages <- 1
  }
  for(p in 1:pages) {
    ratings <- read_html(paste0('https://justfacts.votesmart.org',ratingLink,'?p=',p))
    IGratings <- bind_rows(IGratings,ratings %>%
                             html_element('.interest-group-ratings-table') %>%
                             html_table() %>%
                             mutate(year = yr,
                                    IG = name))
    
  }
  
}


write.csv(IGratings,file = './data/prepped/demographics/IG_ratings_new.csv')

# EOF